set.seed(1)
library(splines)
library(ggplot2)
library(ggtree)
#> ggtree v3.2.1 For help: https://yulab-smu.top/treedata-book/
#>
#> If you use ggtree in published research, please cite the most appropriate paper(s):
#>
#> 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
#> 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
#> 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
library(reshape2)
library(CostOfResistance)
#> Warning: replacing previous import 'ape::drop.tip' by 'treeio::drop.tip' when
#> loading 'CostOfResistance'
library(ape)
#>
#> Attaching package: 'ape'
#> The following object is masked from 'package:ggtree':
#>
#> rotate
library(posterior)
#> This is posterior version 1.2.1
#>
#> Attaching package: 'posterior'
#> The following objects are masked from 'package:stats':
#>
#> mad, sd, var
library(cmdstanr)
#> This is cmdstanr version 0.5.2
#> - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
#> - CmdStan path: /home/david/.cmdstan/cmdstan-2.30.1
#> - CmdStan version: 2.30.1
#>
#> A newer version of CmdStan is available. See ?install_cmdstan() to install it.
#> To disable this check set option or environment variable CMDSTANR_NO_VER_CHECK=TRUE.
library(patchwork)
library(viridis)
#> Loading required package: viridisLite
library(RColorBrewer)
library(latex2exp)
run_mcmc <- F
Honestly wish my computer could just read my mind and do what I want, automatically
tr <- read.tree(system.file("extdata", "grad2016.nwk", package="CostOfResistance", mustWork=T))
tr <- makeNodeLabel(tr)
meta <- read.table(system.file("extdata", "grad2016.tab", package="CostOfResistance", mustWork=T),
sep='\t',comment.char='',header=T,as.is=T)
rownames(meta) <- meta$ID
cutoffs <- read.csv(system.file("extdata", "cutoffs.csv", package="CostOfResistance", mustWork=T))
Dichotomise Resistance data
rdf <- data.frame(ID=meta$ID)
rownames(rdf) <- rownames(meta)
abx_names <- data.frame(cutoff_name = c("CFX", "CIP", "CRO", "AZI", "PEN", "TET", "SPC"),
meta_name=c("CFX", "CIP", "CRO", "AZI", "PEN", "TET", "SPC")
)
for (s in c(1:nrow(abx_names))) {
rdf[[abx_names$cutoff_name[s]]] <- (meta[[abx_names$meta_name[s]]] > cutoffs[[abx_names$cutoff_name[s]]][2])
}
rdf<-within(rdf, rm(ID))
Select the three lineages of interest
CL_1 <- extract.clade(tr, "Node129")$tip.label
CL_2 <- extract.clade(tr, "Node580")$tip.label
CL_3 <- extract.clade(tr, "Node1038")$tip.label
tr_r <- keep.tip(tr, c(CL_1,CL_2,CL_3))
#Remove CFX resistant subclade
tr_r <- drop.tip(tr_r, extract.clade(tr_r, "Node732")$tip.label)
tr_r <- ladderize(tr_r)
p <- ggtree(tr_r) + geom_nodelab(geom='text',size=4) +
theme_tree2()
gheatmap(p, rdf, offset=8, width=0.6,
colnames=FALSE, legend_title="Resistance")+
scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
We are interested in the two lineages within the subtree with root at Node125
cl_dat <- data.frame(node=nodeid(tr_r, c("Node580", "Node129", "Node1038")), name=c("FQ Susceptible", "FQ Resistant 1","FQ Resistant 2"))
mrsd_full <- max(meta[tr_r$tip.label, "Year"])
p <- ggtree(tr_r, mrsd=paste0(mrsd_full,"-01-01")) +
geom_cladelab(
data = cl_dat,
mapping = aes(
node = node,
label = name,
color = name
),
fontsize = 3,
angle=90,
align = TRUE,
vjust = 0.0,
hjust = "center",
offset= 10,
lwd=4.0,
show.legend = FALSE
) +
scale_color_brewer(palette="Dark2") +
theme_tree2()
h1 <- gheatmap(p, rdf[,c("CIP", "AZI", "PEN", "TET")], width=0.2, colnames=FALSE, legend_title="Resistance")+
scale_fill_manual(values=c("blue", "red")) +
guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
scale_x_ggtree() +
theme(legend.position="bottom",
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
h1
pdf("Figures/gono_tree_90.pdf",8,8)
plot(h1)
dev.off()
#> png
#> 2
Node580 corresponds to FQ susceptible, Node129 is FQ resistant Extract the two lineages, filter out tips with missing FQ profiles,or that are FQ resistant and in the susceptible clade and vice versa Further remove all tips that are CIP resistant (mostly a small subclade in the susceptible tree)
sus_tr <- extract.clade(tr_r, "Node580")
res_tr_1 <- extract.clade(tr_r, "Node129")
res_tr_2 <- extract.clade(tr_r, "Node1038")
rdf_sus <- rdf[sus_tr$tip.label,]
rdf_res_1 <- rdf[res_tr_1$tip.label,]
rdf_res_2 <- rdf[res_tr_2$tip.label,]
tip_times_sus <- meta[rownames(rdf_sus),]$Year
tip_times_res_1 <- meta[rownames(rdf_res_1),]$Year
tip_times_res_2 <- meta[rownames(rdf_res_2),]$Year
mrsd_sus <- max(tip_times_sus)
mrsd_res_1 <- max(tip_times_res_1)
mrsd_res_2 <- max(tip_times_res_2)
Plot the filtered subtrees
p1 <- ggtree(sus_tr, mrsd=paste0(mrsd_sus,"-01-01")) +
geom_tiplab(size=1, align=TRUE, linesize=.5)+
scale_x_ggtree() +
theme_tree2()
gheatmap(p1, rdf, offset=8, width=0.6,
colnames=FALSE, legend_title="Resistance")+
scale_fill_manual(values=c("blue", "red"))+
labs(title="Susceptible Tree") +
guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
p2 <-ggtree(res_tr_1, mrsd=paste0(mrsd_res_1,"-01-01"))+theme_tree2() +
geom_tiplab(size=1, align=TRUE, linesize=.5)+
scale_x_ggtree() +
theme_tree2()
gheatmap(p2, rdf, offset=8, width=0.6,
colnames=FALSE, legend_title="Resistance")+
scale_fill_manual(values=c("blue", "red"))+
labs(title="Resistant Tree") +
guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
p2 <-ggtree(res_tr_2, mrsd=paste0(mrsd_res_2,"-01-01"))+theme_tree2() +
geom_tiplab(size=1, align=TRUE, linesize=.5)+
scale_x_ggtree() +
theme_tree2()
gheatmap(p2, rdf, offset=8, width=0.6,
colnames=FALSE, legend_title="Resistance")+
scale_fill_manual(values=c("blue", "red"))+
labs(title="Resistant Tree") +
guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
Set the analysis end date, move tips into the middle of the year as opposed to the beginning by offsetting most recent sampling date by +0.5
t_end <- max(mrsd_res_1, mrsd_res_2, mrsd_sus)+1
mr_sus <- t_end-mrsd_sus-0.5
mr_res_1 <- t_end-mrsd_res_1-0.5
mr_res_2 <- t_end-mrsd_res_2-0.5
print(t_end)
#> [1] 2014
Load and preprocess usage data
pl_begin <- 1988
# AZI, PEN, TET, CEF, FQ
dat <- system.file("extdata", "usage-clean.csv", package = "CostOfResistance", mustWork = TRUE)
abx_raw <- as.matrix(read.csv(dat, header=F))
abx_df <- as.data.frame(t(abx_raw))
colnames(abx_df) <- c("year",abx_df[1,2:14])
abx_df <- abx_df[-1,]
rownames(abx_df) <- as.integer(abx_df[,1])
abx_df$year <- as.integer(abx_df$year)
abx_df$FQ <- as.numeric(abx_df[, "Ciprofloxacin"]) + as.numeric(abx_df[, "Ofloxacin"])
abx_df$CPH <- as.numeric(abx_df[, "Cefixime"]) + as.numeric(abx_df[, "Ceftriaxone 250 mg"]) + as.numeric(abx_df[, "Ceftriaxone 125 mg"]) +as.numeric(abx_df[, "Other Cephalo."])
abx_df$Other <- 100-as.numeric(abx_df[, "FQ"]) -
as.numeric(abx_df[, "CPH"]) -
as.numeric(abx_df[, "Penicillins"]) -
as.numeric(abx_df[, "Azithromycin 2gm"]) -
as.numeric(abx_df[, "Tetracyclines"])
usage_f_fq <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"FQ"]), c(pl_begin:t_end))
usage_f_pen <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Penicillins"]), c(pl_begin:t_end))
usage_f_tet <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Tetracyclines"]), c(pl_begin:t_end))
usage_f_azi <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Azithromycin 2gm"]), c(pl_begin:t_end))
usage_f_CPH <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"CPH"]), c(pl_begin:t_end))
usage_f_other <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Other"]), c(pl_begin:t_end))
time_grid <- seq(from=pl_begin, to=t_end, length.out=4000)
fq_df <- data.frame(year=rep(time_grid,6),
usage=c(sapply(time_grid,usage_f_fq),
sapply(time_grid,usage_f_pen),
sapply(time_grid,usage_f_tet),
sapply(time_grid,usage_f_azi),
sapply(time_grid,usage_f_CPH),
sapply(time_grid,usage_f_other)),
abx=c(rep("Fluoroquinolones",length(time_grid)),
rep("Penicillins", length(time_grid)),
rep("Tetracyclines",length(time_grid)),
rep("Azithromycin", length(time_grid)),
rep("Cephalosporins", length(time_grid)),
rep("Other", length(time_grid))))
Plot usage data, plot individual areas for AMRs that the lineages carry resistance against
p <- ggplot(fq_df, aes(x=year, y=usage, fill=factor(abx, levels=rev(c("Fluoroquinolones", "Penicillins","Tetracyclines", "Cephalosporins", "Azithromycin", "Other"))))) +
geom_area() +
scale_x_continuous(breaks = seq(pl_begin, t_end, by =1)) +
labs(x="Year", y="Average Usage as Proportion of Primary Treatment") +
guides(fill = guide_legend(title="Antimicrobial",reverse=F)) +
scale_fill_viridis(discrete=T) +
scale_y_continuous(labels = scales::percent_format(scale = 1), breaks=seq(from=0,to=100, by=10)) +
geom_vline(xintercept=1995, linetype="longdash", color="gray70",alpha=.8)+
geom_text(aes(x=1995, label="\nAnalysis Start Date", y=40), size=rel(4.0), colour="gray70", alpha=.8, angle=90)+
theme_minimal() +
theme(axis.text.x=element_text(size=rel(0.7), angle = 45, vjust=0.5, hjust=0.5),
axis.text.y=element_text(size=rel(0.7), hjust=1),
axis.title.y=element_text(size=rel(0.6)),
axis.title.x=element_text(size=rel(0.6)),
aspect.ratio=1,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(size=rel(0.2), colour = "grey80"),
legend.position="right",
legend.justification="left",
legend.margin=margin(0,0,0,0),
legend.box.margin=margin(0,0,0,-10))
pl1 <- h1 + annotation_custom(
grob = ggplotGrob(p),
xmin = 1795,
xmax = 1995,
ymin = 190,
ymax = 410)
pdf("Figures/gono_data.pdf",6,6)
pl1
dev.off()
#> png
#> 2
We can see that PEN + TET was used in significant quantities before ~1995. The FQ resistant lineage carries resistance against these as well, but susceptible clade does not. Set the analysis start date to 1995. Subset FQ usage data for the given timespan.
t_begin <- 1995
usage_df <- data.frame(year=c(t_begin:t_end), usage=as.numeric(abx_df[sapply(c(t_begin:t_end), paste0),"FQ"]))
usage_df$usage <- usage_df$usage/100
gamma_ansatz <- 1/90.0
time_scale <- 365.0
t0 <- t_begin #0
tmax <- t_end #t_end - t_begin
usage_df$time #<- usage_df$time - t_begin
#> NULL
stopifnot(all(usage_df$time<=tmax))
stopifnot(all(usage_df$time>=t0))
Sample
out <- infer_costs2(list(sus_tr, res_tr_1, res_tr_2),
list(mr_sus,mr_res_1, mr_res_2),
usage_df$usage,
usage_df$year,
t0,
tmax,
gamma_ansatz,
time_scale,
n_iter=2000,
n_warmup=2000,
model="deterministic",
K=60,
L=6.5,
seed=1345678,
gamma_log_sd = 0.1,
stan_control=list(adapt_delta=.99,
max_treedepth=13,
parallel_chains=4,
chains=4,
refresh=1000
))
saveRDS(out, "~/gono_90s.rds")
Load existing analysis result
out <- readRDS("~/gono_90s.rds")
Check convergence
print(out$converged)
#> [1] TRUE
source("Figures/figure3.R")
pdf("Figures/figure3_90.pdf",6,6)
plot(fig3)
dev.off()
#> png
#> 2
fig3
fig4 <- plot_qpairs(out)
pdf("Figures/figure4_90.pdf",6,6)
plot(fig4)
dev.off()
#> png
#> 2
fig4
source("Figures/figure5.R")
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8922 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8922 rows containing non-finite values (stat_contour_filled).
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8856 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8856 rows containing non-finite values (stat_contour_filled).
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
pdf("Figures/figure5_90.pdf",6,6)
plot(fig5)
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
dev.off()
#> png
#> 2
fig5
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Removed 8544 rows containing non-finite values (stat_contour_filled).
s4 <- plot_traces(out)
pdf("Figures/s4_90.pdf",12,12)
plot(s4)
dev.off()
#> png
#> 2
s4
s5a <- plot_ppcheck_At(out, 1)
s5b <- plot_ppcheck_At(out, 2)
s5c <- plot_ppcheck_At(out, 3)
pdf("Figures/s5_90.pdf",6,6)
plot(s5a)
plot(s5b)
plot(s5c)
dev.off()
#> png
#> 2
s5a
s5b
s5c
s6 <- plot_hyperpar_pairs(out)
pdf("Figures/s6_90.pdf",6,6)
plot(s6)
dev.off()
#> png
#> 2
s6
s7a <- plot_epi_dynamic(out,1)
s7b <- plot_rt(out,1)
pdf("Figures/s7_90.pdf",6,6)
plot(s7a)
plot(s7b)
dev.off()
#> png
#> 2
s7a
s7b